#!/bin/bash
#set -x 

USAGE="
`basename $0` <REF2TID_MAP> <TAXONOMY_TREE> <BIN_DIR> <INDEX_DIR> <KMERSIZE>

ARGUMENT
    <REF2TID_MAP>    <FILE>        reference file path mapped to taxonomy id.  				
    <NODE_PATH>      <FILE>        path of taxonomy tree (nodes.dmp).
    <BIN_DIR>        <DIR>         directory of deSPI's execuative files.
    <INDEX_DIR>      <DIR>         directory for deSPI's index.
    <KMERSIZE>       <INT>         kmer size for building Debruijn graph [0, 31]. 
"
if [ $# -lt 4 ]
then
	echo "ERROR: 4 required arguments are needed, only input $#"
	printf "$USAGE"
	exit 1
fi

check_for_bins() {
	DESPI_BIN_DIR=$1
	if [ -f "$DESPI_BIN_DIR"/jdbg ] && [ -f "$DESPI_BIN_DIR"/deSPI ]
	then
		return `true`
	else
		echo "ERROR: Program JDBG or deSPI doesn't exist"
		return `false`
	fi	
}

REFERENCE_PATH=$(readlink -f $1)
TAXONOMY_TREE_PATH=$(readlink -f $2)
DESPI_BIN_DIR=$(readlink -f $3)
INDEX_DIR=$(readlink -f $4)
KMER_SIZE=$5

mkdir -p $INDEX_DIR
TMPDIR="$INDEX_DIR""/tmp"
ABOREFPATH="$INDEX_DIR""/.abo_ref_path"

if check_for_bins $DESPI_BIN_DIR
then

	if [ ! -f "$TAXONOMY_TREE_PATH" ]
	then
		echo "evolutionary tree missing error, please download it from NCBI"
		exit 
	fi
	if [ ! -f "$REFERENCE_PATH" ]
	then
		echo "Please specify the file contains path of reference and its taxonomy id"
		exit 1
	fi
	
	if [ ! -f "database.jdb" ]
	then
		rm -f $ABOREFPATH
		ABSPATHPREF=$(dirname $REFERENCE_PATH)
		while read -r fn tid
		do
			if [ ! "${fn:0:1}" == "/" ]
			then
				echo "$ABSPATHPREF"/"$fn" $tid >> $ABOREFPATH
			else
				echo "$fn" "$tid" >> $ABOREFPATH
			fi	
		done < $REFERENCE_PATH
		
		FILE_SIZE=$(cut -d' '  -f1 $ABOREFPATH |xargs ls -l | awk 'BEGIN{size=0}{size += $5}END{print size}')		#cut -f1 $REFERENCE_PATH |xargs echo  
		#FILE_SIZE=$(awk 'BEGIN{size=0}{size += $5}END{print size}' ref_size)

		#FILE_SIZE=$(ls -l deSPI-wgs.fa | awk '{print $5}')
		#echo $FILE_SIZE		
		#rm -f ref_size
		let JELLYFISH_HASH_SIZE=FILE_SIZE*115/100
		echo $JELLYFISH_HASH_SIZE	
		if [ "$JELLYFISH_HASH_SIZE" -gt "2000000000" ]
		then
			JELLYFISH_HASH_SIZE="2000000000"
		fi
		
		mkdir -p $TMPDIR 
		cd $TMPDIR
		#echo $DESPI_BIN_DIR
		#echo $DESPI_BIN_PATH
		
		#TRANSIT_DESPI_BIN_PATH=$DESPI_BIN_PATH
		#TRANSIT_TAXONOMY_TREE_PATH=$TAXONOMY_TREE_PATH
		#TRANSIT_REFERENCE_PATH=$ABOREFPATH
		
		#if [ ! "${TRANSIT_DESPI_BIN_PATH:0:1}" == "/" ]
		#then
			#TRANSIT_DESPI_BIN_PATH="../$TRANSIT_DESPI_BIN_PATH"
		#fi	
		
		#if [ ! "${TRANSIT_TAXONOMY_TREE_PATH:0:1}" == "/" ]
		#then
			#TRANSIT_TAXONOMY_TREE_PATH="../$TRANSIT_TAXONOMY_TREE_PATH"
		#fi	
		#if [ ! "${TRANSIT_REFERENCE_PATH:0:1}" == "/" ]
		#then
			#TRANSIT_REFERENCE_PATH="../$TRANSIT_REFERENCE_PATH"
		#fi	
		
		if [ ! -f database.jdb ]
		then
			if [ ! -f "mer_counts_0" ]
			then
				echo "counting kmers, hash size:$JELLYFISH_HASH_SIZE"
				"$DESPI_BIN_DIR"/jdbg count -m $KMER_SIZE -s $JELLYFISH_HASH_SIZE -t 4 -C -T $TAXONOMY_TREE_PATH $ABOREFPATH
				# if fail to create
				if [ ! $? -eq 0 ]
				then
					echo "jdbg failed to count kmers"
					exit 1
				fi
				#rm -f $TRANSIT_REFERENCE_PATH
			fi

			if [ -e "mer_counts_1" ]
			then
				#echo hahahha
				$DESPI_BIN_DIR/jdbg merge -o database.jdb.tmp -T $TAXONOMY_TREE_PATH mer_counts_* # && touch .jf.merge
				if [ ! $? -eq 0 ]
				then
					echo "jdbg failed to merge kmers"
					exit 1
				fi
			else
				if [ -f "mer_counts_0" ]
				then
					mv mer_counts_0 database.jdb.tmp 
				else
					echo "jdbg failed to count kmers"
					exit 1
				fi
			fi
			
			if [ -f "database.jdb.tmp" ]
			then	
				mv database.jdb.tmp ../database.jdb
				echo "jdbg finished kmer counting"
			else
				echo "jdbg failed to count kmers"
				exit 1
			fi
		fi
		cd ..
		rm -rf $TMPDIR
	else 
		echo "found database.jdb"		
	fi
	echo "building deSPI index......"
	if [ -f database.jdb ]
	then
		$DESPI_BIN_DIR/deSPI index -k $KMER_SIZE database.jdb $TAXONOMY_TREE_PATH $ABOREFPATH $INDEX_DIR # >/dev/null 2>/dev/null
		if [ ! $? -eq 0 ]
		then
			echo "deSPI failed to build the index"
			exit 1
		fi
		$DESPI_BIN_DIR/convert2map	$TAXONOMY_TREE_PATH species > $INDEX_DIR/map
	fi
	#rm -f database.srt ksp.srt
	rm -f $ABOREFPATH 	
	echo "deSPI finished building deSPI index ^_^"
else 
	echo "jdbg or deSPI missing error, re-download deSPI package to find"
	exit 1
fi	
cd ..



